# =============================================================================
# FILE 5. POLLING DATA, FLANDERS (2019 & 2024)
# =============================================================================
# PURPOSE:
#   Produce a high-resolution time-series figure of Flemish party vote intention
#   (scatter + LOESS smooth), with key reference dates (elections / year markers)
#   and highlighted election-result points.
#
# INPUTS:
#   - polls_flanders_2024.xlsx (sheet: "regional")
#
# OUTPUTS:
#   - polls_fl_2019_2024.png  (7x4 inches @ 1200 dpi)
# =============================================================================

################################################################################
# 1. WORKING DIRECTORY + FILE LOCATIONS
################################################################################

getwd()

# Set project directory (update it to your own directory that contains the input files)
setwd("updated_path_here")

# (Optional best practice) Define input/output file names as objects so they’re
# easy to change in one place.
input_xlsx  <- "polls_flanders_2024.xlsx"
input_sheet <- "regional"
output_png  <- "polls_fl_2019_2024.png"


################################################################################
# 2. LOAD & PREP DATA
################################################################################

# Read the regional polling series
polls.fl <- read_excel(input_xlsx, sheet = input_sheet)

# Ensure the date column is in Date format (required for scale_x_date)
polls.fl$date <- as.Date(polls.fl$date)

# Coerce party columns to numeric to prevent ggplot from treating them as
# character/factor (which would break continuous plotting / smoothing).
polls.fl$nva     <- as.numeric(polls.fl$nva)
polls.fl$vb      <- as.numeric(polls.fl$vb)
polls.fl$cdv     <- as.numeric(polls.fl$cdv)
polls.fl$vld     <- as.numeric(polls.fl$vld)
polls.fl$vooruit <- as.numeric(polls.fl$vooruit)
polls.fl$groen   <- as.numeric(polls.fl$groen)
polls.fl$pvda    <- as.numeric(polls.fl$pvda)
polls.fl$others  <- as.numeric(polls.fl$others)

# Color palette (named vector) for consistent party coloring across layers
# Keys must match the strings used in geom_smooth(... color = "nva", etc.)
colors.fl <- c(
  "nva"     = "#FCBD1B",
  "vb"      = "black",
  "cdv"     = "#F58216",
  "pvda"    = "#AC312A",
  "vld"     = "#005DAA",
  "vooruit" = "#ff6961",
  "groen"   = "#C5E908"
)

# Key dates for axis breaks (election day + yearly markers)
# These define BOTH where ticks appear and what labels will be printed.
specific_dates <- as.Date(c(
  "2019-05-26",  # 2019 election
  "2020-01-01",
  "2021-01-01",
  "2022-01-01",
  "2023-01-01",
  "2024-01-01",
  "2024-06-09"   # 2024 election
))

# Open a PNG graphics device to save the final figure
# units/width/height/res define the output size and resolution
png(output_png, units = "in", width = 7, height = 4, res = 1200)


################################################################################
# 3. BASE FIGURE: SCATTER + LOESS SMOOTHS
################################################################################

# Initialize plot with shared x aesthetic (date)
polls.fl.g <- ggplot(polls.fl, aes(x = date)) +
  
  # --- Raw poll points (scatter) ---
  # alpha = 1/4 increases transparency to reduce overplotting
  geom_point(aes(y = nva),     color = "#FCBD1B", alpha = 1/4) +
  geom_point(aes(y = vb),      color = "black",   alpha = 1/4) +
  geom_point(aes(y = cdv),     color = "#F58216", alpha = 1/4) +
  geom_point(aes(y = pvda),    color = "#AC312A", alpha = 1/4) +
  geom_point(aes(y = vld),     color = "#005DAA", alpha = 1/4) +
  geom_point(aes(y = vooruit), color = "#ff6961", alpha = 1/4) +
  geom_point(aes(y = groen),   color = "#C5E908", alpha = 1/4)

# Add LOESS trend lines for each party
# - se = FALSE removes confidence band for a cleaner figure
# - span controls smoothness (smaller = wigglier)
# - color is mapped to a string key, then resolved via scale_color_manual
polls.fl.g <- polls.fl.g +
  geom_smooth(aes(y = nva,     color = "nva"),     se = FALSE, method = loess, span = 0.1, size = .75) +
  geom_smooth(aes(y = vb,      color = "vb"),      se = FALSE, method = loess, span = 0.1, size = .75) +
  geom_smooth(aes(y = cdv,     color = "cdv"),     se = FALSE, method = loess, span = 0.1, size = .75) +
  geom_smooth(aes(y = pvda,    color = "pvda"),    se = FALSE, method = loess, span = 0.1, size = .75) +
  geom_smooth(aes(y = vld,     color = "vld"),     se = FALSE, method = loess, span = 0.1, size = .75) +
  geom_smooth(aes(y = vooruit, color = "vooruit"), se = FALSE, method = loess, span = 0.1, size = .75) +
  geom_smooth(aes(y = groen,   color = "groen"),   se = FALSE, method = loess, span = 0.1, size = .75) +
  
  # Legend formatting
  labs(color = "") +
  scale_color_manual(
    labels = c("CD&V", "Groen", "N-VA", "PVDA", "VB", "Open Vld", "Vooruit"),
    values = colors.fl
  ) +
  guides(color = guide_legend(ncol = 1, byrow = TRUE)) +
  theme(
    legend.box.just    = "right",
    legend.margin      = margin(0, 0, 0, 0),
    legend.background  = element_rect(fill = "NA"),
    legend.text        = element_text(size = 14),
    legend.key.size    = unit(.8, "cm")
  )


################################################################################
# 4. AXES, SCALES, AND OVERALL THEME
################################################################################

polls.fl.g <- polls.fl.g +
  # Axis labels (x intentionally blank; y is percentage scale)
  xlab("") +
  ylab("Percentage") +
  
  # Minimal theme baseline, then explicit overrides
  theme_minimal() +
  theme(
    plot.title       = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title       = element_text(size = 10),
    axis.text        = element_text(size = 9),
    axis.ticks       = element_blank(),
    axis.title.y     = element_text(size = 10, margin = margin(r = 8))
  ) +
  
  # X-axis: fixed breaks at specific_dates with stacked month/year labels
  scale_x_date(
    breaks = specific_dates,
    guide  = guide_axis(angle = 90),
    labels = format(specific_dates, "%b\n%Y")
  ) +
  
  # Y-axis: percentage scale, fixed limits and consistent tick spacing
  scale_y_continuous(
    "Percentage",
    limits = c(0, 30),
    breaks = seq(0, 30, 5),
    expand = expansion(mult = c(0.03, 0.03))
  )


################################################################################
# 5. HIGHLIGHT KEY REFERENCE POINTS (ELECTION RESULTS / ANCHOR POLLS)
################################################################################

# Strategy:
# - For each highlighted value, add a small filled dot AND an outlined square
#   (shape = 5) to create a “callout” marker that stands out from raw points.
# - Dates are set with ymd(...) to avoid ambiguity and ensure Date type.

polls.fl.g <- polls.fl.g +
  
  # ---- June 2024 (e.g., election day) ----
geom_point(aes(x = ymd(20240609), y = 23.88), colour = "#FCBD1B", size = 1.5) +
  geom_point(aes(x = ymd(20240609), y = 23.88), shape = 5, colour = "#FCBD1B", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20240609), y = 22.66), colour = "black", size = 1.5) +
  geom_point(aes(x = ymd(20240609), y = 22.66), shape = 5, colour = "black", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20240609), y = 13.04), colour = "#F58216", size = 1.5) +
  geom_point(aes(x = ymd(20240609), y = 13.04), shape = 5, colour = "#F58216", size = 3, stroke = 1.) +
  
  # Note: This PVDA point uses 2024-05-20 (different from the other 2024 points).
  # Keep as-is if intentional (e.g., a different reference poll or result date).
  geom_point(aes(x = ymd(20240520), y = 8.31), colour = "#AC312A", size = 1.5) +
  geom_point(aes(x = ymd(20240520), y = 8.31), shape = 5, colour = "#AC312A", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20240609), y = 8.33), colour = "#005DAA", size = 1.5) +
  geom_point(aes(x = ymd(20240609), y = 8.33), shape = 5, colour = "#005DAA", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20240609), y = 13.85), colour = "#ff6961", size = 1.5) +
  geom_point(aes(x = ymd(20240609), y = 13.85), shape = 5, colour = "#ff6961", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20240609), y = 7.29), colour = "#C5E908", size = 1.5) +
  geom_point(aes(x = ymd(20240609), y = 7.29), shape = 5, colour = "#C5E908", size = 3, stroke = 1.) +
  
  # ---- May 2019 (previous election) ----
geom_point(aes(x = ymd(20190526), y = 24.83), colour = "#FCBD1B", size = 1.5) +
  geom_point(aes(x = ymd(20190526), y = 24.83), shape = 5, colour = "#FCBD1B", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20190526), y = 18.50), colour = "black", size = 1.5) +
  geom_point(aes(x = ymd(20190526), y = 18.50), shape = 5, colour = "black", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20190526), y = 15.40), colour = "#F58216", size = 1.5) +
  geom_point(aes(x = ymd(20190526), y = 15.40), shape = 5, colour = "#F58216", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20190526), y = 5.32), colour = "#AC312A", size = 1.5) +
  geom_point(aes(x = ymd(20190526), y = 5.32), shape = 5, colour = "#AC312A", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20190526), y = 13.13), colour = "#005DAA", size = 1.5) +
  geom_point(aes(x = ymd(20190526), y = 13.13), shape = 5, colour = "#005DAA", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20190526), y = 10.35), colour = "#ff6961", size = 1.5) +
  geom_point(aes(x = ymd(20190526), y = 10.35), shape = 5, colour = "#ff6961", size = 3, stroke = 1.) +
  
  geom_point(aes(x = ymd(20190526), y = 10.11), colour = "#C5E908", size = 1.5) +
  geom_point(aes(x = ymd(20190526), y = 10.11), shape = 5, colour = "#C5E908", size = 3, stroke = 1.)


################################################################################
# 6. RENDER & SAVE
################################################################################

# Print plot (necessary in many non-interactive script contexts to actually draw)
polls.fl.g

# Close device to write the PNG to disk
dev.off()

# =============================================================================
# END OF SCRIPT
# =============================================================================